package com.common.server.common.util;


import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.common.server.modules.file.entity.FileEntity;
import com.common.server.modules.file.mapper.FileMapper;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.math.BigDecimal;
import java.time.LocalDateTime;

/**
 * 描述：word 转 html 工具类
 * <p>
 * 作者：HuTongFu
 * 时间：2019/6/25 10:28
 */
@Slf4j
public class Word2HtmlUtil {

    public static String docToHtml(String filePath, String fileName, String url, FileMapper fileMapper) throws Exception {
        //创建word文件中图片文件
        File imageFile = new File(filePath);
        if (!imageFile.exists()) {
            imageFile.mkdirs();
        }
        //读取word文件输入流，转为 document文档对象
        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(filePath + fileName));
        Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
        //通过转换器将word文档对象转换
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
        //处理文档中的图片
        FileEntity fileInfo = new FileEntity();
        //保存图片，并返回图片的相对路径
        String finalIp = StringUtils.isNotEmpty(url) ? url : IPUtils.getLocalURI();
        wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
            BigDecimal bigDecimal = new BigDecimal(content.length);
            String size = bigDecimal.divide(new BigDecimal(1024)).setScale(2, BigDecimal.ROUND_HALF_UP).toString() + "KB";
            try (FileOutputStream out = new FileOutputStream(filePath + name)) {
                out.write(content);
                fileInfo.setId(StringUtils.uuidGenerator());
                fileInfo.setCode("I" + System.currentTimeMillis());
                fileInfo.setIp(finalIp);
                fileInfo.setUrl(filePath);
                fileInfo.setName(name);
                fileInfo.setOldName(name);
                fileInfo.setType(pictureType.getExtension());
                fileInfo.setSize(size);
                fileInfo.setAddDate(LocalDateTime.now());
                fileInfo.setOrderIndex(String.valueOf(fileMapper.selectCount(new QueryWrapper<>(new FileEntity()))));
                fileInfo.setStatus("6");
                fileInfo.setRemark("用于 Word2Html 的图片");
                fileMapper.insert(fileInfo);
            } catch (Exception e) {
                log.error("文档中包含的图片文件未找到", e);
            }
            return fileInfo.getIp() + "/api/rest/file/download/" + fileInfo.getCode();
        });

        //处理文档
        wordToHtmlConverter.processDocument(wordDocument);
        //获取html的document树
        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(outStream);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        outStream.close();
        String out = outStream.toString("utf-8");
        return out.replaceAll("\\r|\\n", "").replace("\"","");
    }


    public static String docxToHtml(String filePath, String fileName, String url, FileMapper fileMapper) throws Exception {
        FileEntity fileInfo = new FileEntity();
        OutputStreamWriter outputStreamWriter = null;
        try {
            FileInputStream fileInputStream = new FileInputStream(filePath + fileName);
            XWPFDocument document = new XWPFDocument(fileInputStream);
            XHTMLOptions options = XHTMLOptions.create();
            // html中图片的路径
            options.URIResolver(imgUrl -> {
                int beginIndex = imgUrl.lastIndexOf("/") + 1;
                String fileImgName = imgUrl.substring(beginIndex, imgUrl.length());
                String finalIp = StringUtils.isNotEmpty(url) ? url : IPUtils.getLocalURI();
                String type = fileImgName.substring(fileImgName.lastIndexOf(".") + 1).toLowerCase();
                fileInfo.setId(StringUtils.uuidGenerator());
                fileInfo.setCode("I" + System.currentTimeMillis());
                fileInfo.setIp(finalIp);
                fileInfo.setUrl(filePath + "word" + File.separator + "media" + File.separator);
                fileInfo.setName(fileImgName);
                fileInfo.setOldName(fileImgName);
                fileInfo.setType(type);
                fileInfo.setSize("unknown");
                fileInfo.setAddDate(LocalDateTime.now());
                fileInfo.setOrderIndex(String.valueOf(fileMapper.selectCount(new QueryWrapper<>(new FileEntity()))));
                fileInfo.setStatus("6");
                fileInfo.setRemark("用于 Word2Html 的图片");
                fileMapper.insert(fileInfo);
                return url + "/api/rest/file/download/" + fileInfo.getCode();
            });
            // 存放图片的文件夹
            options.setExtractor(new FileImageExtractor(new File(filePath)));
            outputStreamWriter = new OutputStreamWriter(new FileOutputStream(filePath + fileName), "utf-8");
            XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
            xhtmlConverter.convert(document, outputStreamWriter, options);
        } finally {
            if (outputStreamWriter != null) {
                outputStreamWriter.close();
            }
        }
        return FileUtil.readfile(filePath + fileName).replaceAll("\\r|\\n", "");
    }
}
